# #Enns, Kelly, Masaki, and Wohlfarth (EKMW)
# #The following code is identical to that produced by Lebo and Kraft excpet
# #where explicitly noted.
# This simulation code is based on Lebo/Kraft's (2017) replication code for "The General Error Correction Model in Practice" (Research & Politics).
# Our simulations demonstrate how issues of spurious results reported in Figure 1(a)-1(c) are due to LK's inappropriate choice of a lag length for the ADF test.
# We show that when we carefully evaluate an appropriate lag length, the rate of falsely concluding the presence of cointegration is almost always below 5%.

## Set working directory
setwd("")
##EKMW create a folder in the wd to store output files
dir.create("outputs")

## Install and load all required packages
pkg <- c("foreign","tidyverse","fracdiff","tseries","gridExtra")
inst <- pkg %in% installed.packages()
if(length(pkg[!inst]) > 0) install.packages(pkg[!inst])
invisible(lapply(pkg, library, character.only = TRUE))
rm(list = ls())

## Load custom functions
## EKMW load their source file (see file for changes from Lebo and Kraft)
source("func_ekmw_RAP.R")

### Figure 1(a) - 1(c)
## Examine false negatives on ADF test and 
## false positives on alpha*_1 for unrelated series

## Simulate time series
set.seed(20160728)
res <- rbind(simDfAlpha(nt=50), simDfAlpha(nt=100), simDfAlpha(nt=250)
             , simDfAlpha(nt=50, par="rho"), simDfAlpha(nt=100, par="rho")
             , simDfAlpha(nt=250, par="rho"))

#######################################
## EKMW generate second set of results based on ADF with appropriate lag length
res_k0 <- rbind(simDfAlpha_k0(nt=50), simDfAlpha_k0(nt=100), simDfAlpha_k0(nt=250)
             , simDfAlpha_k0(nt=50, par="rho"), simDfAlpha_k0(nt=100, par="rho")
             , simDfAlpha_k0(nt=250, par="rho"))
#######################################

res$par <- factor(res$par, levels = c("rho","d"), labels=c("rho","italic(d)"))
## EKMW generate second set of results based on ADF with appropriate lag length
res_k0$par <- factor(res_k0$par, levels = c("rho","d"), labels=c("rho","italic(d)"))

## Compute 95% confidence intervals
qlo <- function(x) quantile(x, .025)
qhi <- function(x) quantile(x, .975)

## Compute MacKinnon critical values
res$mk_crit <- macKinnon(as.numeric(as.character(res$nt)))
## EKMW generate second set of results based on ADF with appropriate lag length
res_k0$mk_crit <- macKinnon(as.numeric(as.character(res_k0$nt)))

## False positives (alpha1_t < mk_crit given that DF fails to reject)
res$false_positive <- with(res, (alpha1_t < mk_crit) * (dfval >= -2.88)) 
## EKMW generate second set of results based on ADF with appropriate lag length
res_k0$false_positive <- with(res_k0, (alpha1_t < mk_crit) * (dfval >= -2.88)) 

## EKMW added new columns indicating whether we observe a statistically significant effect of x on y
res$lx_false_positive <- with(res, (alpha1_t < mk_crit)* (dfval >= -2.88) * (abs(lx_t) >= 1.96))
res_k0$lx_false_positive <- with(res_k0, (alpha1_t < mk_crit)* (dfval >= -2.88) * (abs(lx_t) >= 1.96))

##############################################
###EKMW save the data from all simulations
write.dta(res,"outputs/res.dta")
write.dta(res_k0,"outputs/res_k0.dta")

res <- read.dta("outputs/res.dta")
res_k0 <- read.dta("outputs/res_k0.dta")
##############################################

## Summarize parameter estimates
res2 <- res %>% group_by(nt, par, val) %>% summarise_each(funs(mean,qlo,qhi))

## Figure 1(a)
ggplot(res2[res2$par=="rho",], aes(x=alpha1_mean, y=dfval_mean)) + 
  geom_point(aes(size=val, shape=nt)) + geom_hline(yintercept=-2.88) + 
  geom_errorbar(aes(ymin = dfval_qlo, ymax = dfval_qhi)) + 
  theme_classic() + theme(panel.border = element_rect(fill=NA)) + 
  xlab("Mean value of ECM parameter") +
  ylab("Mean value and 95% intervals for DF test statistic") +
  scale_shape_discrete(name="Number of\nTime Points") +
  scale_size_continuous(name = expression(paste("\u03C1", " Value"))
                        , breaks = seq(0,.9,.1))
ggsave("outputs/fig1a.png",height=5,width=7, dpi = 300)

## Figure 1(b)
ggplot(res2[res2$par=="italic(d)",], aes(x=alpha1_mean, y=dfval_mean)) + 
  geom_point(aes(size=val, shape=nt)) + geom_hline(yintercept=-2.88) + 
  geom_errorbar(aes(ymin = dfval_qlo, ymax = dfval_qhi)) + 
  theme_classic() + theme(panel.border = element_rect(fill=NA)) + 
  xlab("Mean value of ECM parameter") +
  ylab("Mean value and 95% intervals for DF test statistic") +
  scale_shape_discrete(name="Number of\nTime Points") +
  scale_size_continuous(name = expression(paste(italic("d"), " Value"))
                        , breaks = seq(0,.9,.1)) +
  guides(shape = guide_legend(order=1), size = guide_legend(order=2))
ggsave("outputs/fig1b.png",height=5,width=7, dpi = 300)

## Figure 1(c)
res %>% group_by(par, val, nt) %>% 
  summarize(mean = mean(false_positive)) %>%
  ggplot(aes(y=val, x=nt, fill = mean)) + 
  geom_tile() + geom_text(aes(label=mean)) +
  facet_wrap(~par, labeller = label_parsed) +
  theme_classic() + theme(panel.border = element_rect(fill=NA)) +
  xlab("Number of Time Points") +
  ylab(expression(paste("Value of \u03C1 / ", italic("d")))) +
  scale_fill_gradient2(limits=c(0,.5), low="white", high="grey40"
                       ,name="Proportion of\nFalse Positives") +
ggsave("outputs/fig1c.png",height=5,width=7, dpi = 300)

##############################################
## EKMW show the rate of falsely finding a statistically significant effect of x on y after testing for integration and cointegration
## Figure 1(d)
res %>% group_by(par, val, nt) %>% 
  summarize(mean = mean(lx_false_positive)) %>%
  ggplot(aes(y=val, x=nt, fill = mean)) + 
  geom_tile() + geom_text(aes(label=mean)) +
  facet_wrap(~par, labeller = label_parsed) +
  theme_classic() + theme(panel.border = element_rect(fill=NA)) +
  xlab("Number of Time Points") +
  ylab(expression(paste("Value of \u03C1 / ", italic("d")))) +
  scale_fill_gradient2(limits=c(0,.5), low="white", high="grey40"
                       ,name="Proportion of\nFalse Positives") +
  ggsave("outputs/fig1d.png",height=5,width=7, dpi = 300)
##############################################

## Compute additional information about simulations mentioned in text
res %>% filter(nt==50) %>% group_by(par, val, nt) %>% 
  summarize(mean_alpha = mean(alpha1), df_falseneg=mean(dfval>=-2.93))

################################################################################
################################################################################
## EKMW replicate figure 1(c)-1(d) based on simulation results where we sequentially add lags in the ADF test
## Summarize parameter estimates
res2_k0 <- res_k0 %>% group_by(nt, par, val) %>% summarise_each(funs(mean,qlo,qhi))

## Figure 1(c)
res_k0 %>% group_by(par, val, nt) %>% 
  summarize(mean = mean(false_positive)) %>%
  ggplot(aes(y=val, x=nt, fill = mean)) + 
  geom_tile() + geom_text(aes(label=mean)) +
  facet_wrap(~par, labeller = label_parsed) +
  theme_classic() + theme(panel.border = element_rect(fill=NA)) +
  xlab("Number of Time Points") +
  ylab(expression(paste("Value of \u03C1 / ", italic("d")))) +
  scale_fill_gradient2(limits=c(0,.5), low="white", high="grey40"
                       ,name="Proportion of\nFalse Positives") +
  ggsave("outputs/fig1c_k0.png",height=5,width=7, dpi = 300)

## Figure 1(d)
res_k0 %>% group_by(par, val, nt) %>% 
  summarize(mean = mean(lx_false_positive)) %>%
  ggplot(aes(y=val, x=nt, fill = mean)) + 
  geom_tile() + geom_text(aes(label=mean)) +
  facet_wrap(~par, labeller = label_parsed) +
  theme_classic() + theme(panel.border = element_rect(fill=NA)) +
  xlab("Number of Time Points") +
  ylab(expression(paste("Value of \u03C1 / ", italic("d")))) +
  scale_fill_gradient2(limits=c(0,.5), low="white", high="grey40"
                       ,name="Proportion of\nFalse Positives") +
  ggsave("outputs/fig1d_k0.png",height=5,width=7, dpi = 300)

################################################################################
################################################################################
